
*** descriptive analyses
* Figures 2, 3, A2
* Table 2

** dta for Figure 2: fertility pattern by mother's birth year
use ../data/censuses_ind, replace

keep if birthy >= 1940 & birthy <= 1960
keep if !missing(urban)
collapse nchild nchild2 nchild3 nchild4 nchild5 nchild6, by(urban birthy)

save ../Rdta/fertility_year_census.dta, replace


** dta for Figure 3 Panel A: education by child birth year
use ../data/census2000individual, clear

keep if birthy >= 1965 & birthy <= 1980
keep if hukou == 0

tab eduy, m

g byte edulev2 = eduy >= 6
g byte edulev3 = eduy >= 9
g byte edulev4 = eduy >= 12

collapse (mean) edulev2 edulev3 edulev4, by(birthy) fast

save ../Rdta/edu_year_census_rural.dta, replace


** dta for Figure 3 Panel B: education levels by child age
use ../data/c8290hh, clear

keep if birthyP2 >= 1940 & birthyP2 <= 1960
keep if age >= 6 & age <= 17

keep if urban == 0
collapse (mean) edulev2 edulev3 edulev4, by(age)

save ../Rdta/edu_age_census_rural.dta, replace


** Figure A2: number of teachers in secondary schools (in thousands)
use ../raw/ProvinceData/NewChinaSixtyYears.dta, clear

rename provcode prov

keep prov year nteacherSecondary nstudentSecondary

distinct prov

keep if year >= 1970 & year <= 2005

collapse (sum) nteacherSecondary, by(year)

replace nteacherSecondary = nteacherSecondary / 1000

tw line nteacherSecondary year, ///
xlabel(1970(5)2005) xtitle("Year") ytitle("") title("Number of teachers in secondary schools (in thousands)")

graph export ../output/FigureA2_figTeacher.pdf, replace


** construct main regression sample

use ../data/c8290hh, clear

keep if birthyP2 >= 1940 & birthyP2 <= 1960
keep if age >= 13 & age <= 17
keep if sample2plus == 1
keep if urban == 0
keep if han == 1

drop if age1birthP2 <= 15

sum fine3b, d
drop if fine3b > r(p99)

g male_twinhh = male * twinhh

g eduyP2dm = eduyP2 - 6
g eduyP2dm_twinhh = eduyP2dm * twinhh

g twinsex = 1 if nmaletwin == 2 & twinhh
replace twinsex = 2 if nmaletwin == 0 & twinhh
replace twinsex = 3 if nmaletwin == 1 & twinhh

* province-specific trends, nnumber of distinct prov - 1
tab prov, g(prov)
distinct prov
local n = `r(ndistinct)' - 1
forval i=1/`n' {
	g prov`i'_birthyP2 = prov`i' * birthyP2
	g prov`i'_year2 = prov`i' * (birthyP2 - 1940) * (birthyP2 - 1940)
}

egen prov_edulevP2 = group(prov edulevP2)

	* log
foreach var of varlist lossratio CRdratio sdyReceivedRatio minShare gdppc popGrowth {
	gen ln`var' = log(`var')
}

	* de-median fine3b and baseline controls
foreach var of varlist fine3b age2birthP2 age age2 eduyP2 {
	sum `var', d
	g `var'_dm = `var' - r(p50)
	*g `var'_dm = `var' - r(mean)
}

	* de-median additional controls
foreach var of varlist llfExp3b lnlossratio lnCRdratio lnsdyReceivedRatio lnminShare lngdppc lnpopGrowth gdpg popGrowth  {
	sum `var', d
	scalar m = r(p50)
	gen `var'_dm = `var' - m
}

save ../data/c8290hh_reg_main, replace


** Table 2: summary statistics


* Table 2. Columns (1) and (2)
tabstat edulev3 nchild3 fine3b twinhh  male age  eduyP2 ageP2 age1birthP2 age2birthP2, s(mean sd n)

* Table 2. Columns (3)--(6)
tabstat edulev3 nchild3 fine3b twinhh  male age  eduyP2 ageP2 age1birthP2 age2birthP2, s(mean sd n) by(twinhh)

* export Table 2
est clear
estpost tabstat edulev3 nchild3 fine3b twinhh  male age  eduyP2 ageP2 age1birthP2 age2birthP2, by(twinhh) s(mean sd n) columns(statistics) listwise

esttab using ../output/Table2_tabSum.csv, replace main(Mean) aux(SD) b(4)  nostar unstack noobs nonote nomtitle nonumber 
* reverse the order of columns when creating Table A2 in the online appendix


